Propety Sales Price Trend
Propety Sales Price Percentage Increase
Sales Price vs Borough
Sales Price vs Land
Sales Price vs Tax Class
Sales Price vs Year Built
Sales Price vs Neighbourhood
Number of Schools per borough
Number of Schools per Student Population in each borough
Average SAT Scores of each borough
Clustered Marker Map of Shools in NYC
Number of Rat Sightings per Borough
Number of Rat Sightings per area in each borough
Heatmap of Rat Sightings in NYC
Population Growth Grouped Barplot per borough
Number of Crimes Per Year in all boroughs
Number of Crimes per population in all boroughs
Number of Crimes per Area in all boroughs
Proportion of Types of crimes
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import folium
import os
import branca.colormap
from collections import defaultdict
import geopandas as gpd
from shapely.geometry import Point, Polygon
from folium.plugins import MarkerCluster
from folium.plugins import HeatMap
import branca.colormap as cm
import gmaps
from statsmodels.formula.api import ols
warnings.filterwarnings("ignore")
filenames = ['df_2016to2019sales.csv', 'df_yearly_summary.csv', 'df_crime.csv', 'mean_crime.csv',
'df_schooldata.csv', 'df_rat.csv', 'df_ratperland.csv', 'schools_land.csv', 'population.csv']
visualisation_dfs = []
for i in filenames:
visualisation_dfs.append(pd.read_csv('C:/Users/ChengYao/Desktop/DAO2702/Project/Clean Data/' + i))
def plot_lines_years(data, boroughs, years, title=''):
sns.set(style='darkgrid', font_scale=1.3, font="calibri",
rc={'axes.facecolor':'#d8d8d8'})
years_str = list(map(str, years)) #change all years into a string and then add into a list
plt.figure(figsize=(15, 10)) #set figure size
for i in boroughs:
sns.lineplot(y='AVERAGE SALE PRICE',
x='YEAR',
data=data.groupby(['BOROUGH',
'YEAR']).mean().loc[i].reset_index(), #this group by will filter by the borough and get the average price per year
label=i, #label it as the borough name
lw=3, marker='o', ms=10)
plt.title("Average Sale Price from 2007 - 2019 {}".format(title),
pad=10, fontsize=18) #title
plt.xticks(years, years_str) #plot the x-axis years but with labels as str format
plt.show() #plot graph out
buildingcategories = visualisation_dfs[1]['BUILDING CLASS CATEGORY'].unique()
yearlysummarydata = visualisation_dfs[1]
onehomedata = yearlysummarydata.loc[(yearlysummarydata['BUILDING CLASS CATEGORY'] == buildingcategories[0])]
twohomedata = yearlysummarydata.loc[(yearlysummarydata['BUILDING CLASS CATEGORY'] == buildingcategories[1])]
threehomedata = yearlysummarydata.loc[(yearlysummarydata['BUILDING CLASS CATEGORY'] == buildingcategories[2])]
boroughs = yearlysummarydata['BOROUGH'].unique()
years = yearlysummarydata['YEAR'].unique()
plot_lines_years(onehomedata,boroughs,years, title = 'of One Family Homes' )
plot_lines_years(twohomedata,boroughs,years,title = "of Two Family Homes")
plot_lines_years(threehomedata,boroughs,years, title = "of Three Family Homes")
plot_lines_years(yearlysummarydata, boroughs,years,title = 'of each Borough')
def plotdouble(borough,barcolor,leftcolor,linecolor):
data = visualisation_dfs[1]
data = data.loc[(data['BOROUGH'] == borough)]
data1 = data.groupby(['YEAR']).mean().reset_index()
data1['% INCREASE'] = data1['% INCREASE'] * 100
norm = plt.Normalize(data1['AVERAGE SALE PRICE'].min(), data1['AVERAGE SALE PRICE'].max())
sm = plt.cm.ScalarMappable(cmap=barcolor, norm=norm)
sm.set_array([])
sns.set(style='white', font_scale=1.3, font="calibri",
rc={'axes.facecolor':'#d8d8d8'})
fig,ax1 = plt.subplots(figsize = (15,10))
sns.barplot(x = 'YEAR', y = 'AVERAGE SALE PRICE', data = data1 , hue = 'AVERAGE SALE PRICE', dodge = False ,palette = barcolor, ax = ax1, ci = None)
ax2 = ax1.twinx()
sns.lineplot(x = data1['YEAR'].index, y= data1['% INCREASE'], ax = ax2, color = linecolor)
ax1.get_legend().remove()
ax1.figure.colorbar(sm, pad = 0.1,fraction = 0.046)
ax1.set_xlabel('Year',fontsize = 18)
ax1.set_ylabel('Average Sale Price',fontsize = 18, color = leftcolor)
ax2.set_ylabel('Percentage Change',fontsize = 18,color = linecolor)
plt.title('Average Sale Price & Percentage Increase of ' + borough + ' from 2007 to 2019',fontsize = 20)
plt.show()
plotdouble('Bronx','Reds', 'Red', 'Black')
plotdouble('Brooklyn','Blues','Blue','Red')
plotdouble('Queens','Greens','Green','Red')
plotdouble('Staten Island','Purples','Purple','Red')
plotdouble('Manhattan','Greys','Grey','Red')
df_4yearscombined = visualisation_dfs[0]
#def sale_borough(data):
sns.set(style='darkgrid', font_scale=1.3, font="calibri",
rc={'axes.facecolor':'#d8d8d8'})
years = [2016, 2017, 2018, 2019]
data_year_wise = []
for i in years:
data_year_wise.append(df_4yearscombined.loc[df_4yearscombined['YEAR'] == i, ['SALE PRICE', 'BOROUGH']])
fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(15, 10))
for i, j in enumerate(ax.reshape(-1)):
sns.barplot(x='BOROUGH',
y='SALE PRICE',
data=data_year_wise[i],
ax=j,
estimator=np.mean,
ci=None)
j.title.set_text(str(years[i]))
j.set_ylabel('Mean Sale Price')
j.set_xlabel('Borough')
fig.tight_layout(pad=2.0)
plt.suptitle("Mean Sale Price vs Borough", fontsize=20, y=1.01)
plt.show()
sns.set(style='darkgrid', font_scale=1.3, font="calibri",
rc={'axes.facecolor':'#d8d8d8'})
years = [2016, 2017, 2018, 2019]
data_year_wise = []
for i in years:
data_year_wise.append(df_4yearscombined.loc[df_4yearscombined['YEAR'] == i, ['SALE PRICE', 'BOROUGH']])
fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(15, 10))
for i, j in enumerate(ax.reshape(-1)):
sns.boxplot(x='BOROUGH',
y='SALE PRICE',
data=data_year_wise[i],
ax=j)
j.title.set_text(str(years[i]))
j.set_ylabel('Sale Price')
j.set_xlabel('Borough')
fig.tight_layout(pad=2.0)
plt.suptitle("Sale Price vs Borough", fontsize=20, y=1.01)
plt.show()
bronx = df_4yearscombined[df_4yearscombined['BOROUGH'] == 'Bronx']
brooklyn = df_4yearscombined[df_4yearscombined['BOROUGH'] == 'Brooklyn']
queens = df_4yearscombined[df_4yearscombined['BOROUGH'] == 'Queens']
statenisland = df_4yearscombined[df_4yearscombined['BOROUGH'] == 'Staten Island']
manhattan = df_4yearscombined[df_4yearscombined['BOROUGH'] == 'Manhattan']
bronx = bronx['SALE PRICE'].describe()
brooklyn = brooklyn['SALE PRICE'].describe()
queens = queens['SALE PRICE'].describe()
statenisland = statenisland['SALE PRICE'].describe()
manhattan = manhattan['SALE PRICE'].describe()
df10 = pd.DataFrame()
df10 = pd.concat([df10,bronx,brooklyn,queens,statenisland,manhattan],axis = 1,ignore_index = True)
df10.columns= ['Bronx','Brooklyn','Queens','Staten Island','Manhattan']
df10
sns.set(style='darkgrid', font_scale=1.3, font="calibri",
rc={'axes.facecolor':'#d8d8d8'})
data1 = df_4yearscombined.loc[(df_4yearscombined["SALE PRICE"] < 9600000)]
#data1 = data1[data1['BOROUGH'].isin(['Bronx','Brooklyn','Queens','Staten Island'])]
years = [2016, 2017, 2018, 2019]
data_year_wise = []
for i in years:
data_year_wise.append(data1.loc[data1['YEAR'] == i, ['SALE PRICE', 'BOROUGH']])
fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(15, 10))
for i, j in enumerate(ax.reshape(-1)):
sns.boxplot(x='BOROUGH',
y='SALE PRICE',
data=data_year_wise[i],
ax=j)
j.title.set_text(str(years[i]))
j.set_ylabel('Sale Price')
j.set_xlabel('Borough')
fig.tight_layout(pad=2.0)
plt.suptitle("Sale Price vs Borough", fontsize=20, y=1.01)
plt.show()
sns.set(style='darkgrid', font_scale=1.3, font="calibri",
rc={'axes.facecolor':'#d8d8d8'})
#data2 = df_4yearscombined.loc[(df_4yearscombined["SALE PRICE"] < 20000000)]
data2 = df_4yearscombined[df_4yearscombined['BOROUGH'].isin(['Manhattan'])]
years = [2016, 2017, 2018, 2019]
data_year_wise = []
for i in years:
data_year_wise.append(data2.loc[data2['YEAR'] == i, ['SALE PRICE', 'BOROUGH']])
fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(15, 10))
for i, j in enumerate(ax.reshape(-1)):
sns.boxplot(x='BOROUGH',
y='SALE PRICE',
data=data_year_wise[i],
ax=j)
j.title.set_text(str(years[i]))
j.set_ylabel('Sale Price')
j.set_xlabel('Borough')
fig.tight_layout(pad=2.0)
plt.suptitle("Sale Price vs Borough", fontsize=20, y=1.01)
plt.show()
#def sale_land(data):
sns.set(style='darkgrid', font_scale=1.3, font="calibri",
rc={'axes.facecolor':'#d8d8d8'})
years = [2016, 2017, 2018, 2019]
colors_ = ['#cd025c', '#f5ab60', '#743fc5', '#243e1f']
data_year_wise = []
for i in years:
data_year_wise.append(df_4yearscombined.loc[df_4yearscombined['YEAR'] == i, ['SALE PRICE', 'LAND SQUARE FEET']])
fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(15, 10))
for i, j in enumerate(ax.reshape(-1)):
sns.regplot(x='LAND SQUARE FEET',
y='SALE PRICE',
data=data_year_wise[i],
ax=j,
color=colors_[i])
j.title.set_text(str(years[i]))
j.set_ylabel('Sale Price')
j.set_xlabel('LAND SQUARE FEET')
fig.tight_layout(pad=2.0)
plt.suptitle("Sale Price vs Land Square Feet", fontsize=20, y=1.01)
plt.show()
sns.set(style='darkgrid', font_scale=1.3, font="calibri",
rc={'axes.facecolor':'#d8d8d8'})
years = [2016, 2017, 2018, 2019]
data_year_wise = []
for i in years:
data_year_wise.append(df_4yearscombined.loc[df_4yearscombined['YEAR'] == i, ['SALE PRICE', 'TAX CLASS']])
fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(15, 10))
for i, j in enumerate(ax.reshape(-1)):
sns.barplot(x='TAX CLASS',
y='SALE PRICE',
data=data_year_wise[i],
ax=j,
order = ['1','1B','2A','2B','2','4'],
estimator=np.mean,
ci=None)
j.title.set_text(str(years[i]))
j.set_ylabel('Mean Sale Price')
j.set_xlabel('Tax Class')
fig.tight_layout(pad=2.0)
plt.suptitle("Mean Sale Price vs Tax Class", fontsize=20, y=1.01)
plt.show()
sns.set(style='darkgrid', font_scale=1.3, font="calibri",
rc={'axes.facecolor':'#d8d8d8'})
years = [2016, 2017, 2018, 2019]
data_year_wise = []
for i in years:
data_year_wise.append(df_4yearscombined.loc[df_4yearscombined['YEAR'] == i, ['SALE PRICE', 'YEAR BUILT (Bins)']])
fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(15, 10))
for i, j in enumerate(ax.reshape(-1)):
sns.barplot(x='YEAR BUILT (Bins)',
y='SALE PRICE',
data=data_year_wise[i],
order = ['< 1900', '1900 - 1924', '1925 - 1949', '1950 - 1974','1975 - 1999','2000 - 2019'],
ax=j,
estimator = np.mean,
ci=None)
j.title.set_text(str(years[i]))
j.set_ylabel('Mean Sale Price', labelpad=10)
j.set_xlabel('Year Built (Bins)', labelpad=10)
j.set_xticklabels(j.get_xticklabels(), rotation=15)
fig.tight_layout(pad=2.0)
plt.suptitle("Mean Sale Price vs Year Built (Bins)", fontsize=20, y=1.01)
plt.show()
neighbourhood = df_4yearscombined[['SALE PRICE', 'YEAR','NEIGHBORHOOD','BOROUGH']]
neighbourhood = neighbourhood.loc[(neighbourhood['YEAR'] == 2019)]
neighbourhood = neighbourhood.sort_values(['BOROUGH'], ascending = True)
boroughs = neighbourhood['BOROUGH'].unique() #unique values given are in order of the datafile and we have checked thus reason why we sorted earlier
data_borough_wise = []
for i in boroughs:
data_borough_wise.append(neighbourhood.loc[neighbourhood['BOROUGH'] == i, ['SALE PRICE', 'NEIGHBORHOOD']])
def neighbourhood_plot(data,i,title): #function to plot the neighbourhood barplots
sns.set(style='darkgrid', font_scale=1.3, font="calibri",
rc={'axes.facecolor':'#d8d8d8'})
fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(15, 10))
sns.barplot(x='NEIGHBORHOOD',
y='SALE PRICE',
data=data[i],
estimator = np.mean,
ci=None)
ax.set_ylabel('Mean Sale Price', labelpad=10)
ax.set_xlabel('Neighbourhoods', labelpad=10)
ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontsize = 14)
fig.tight_layout(pad=2.0)
plt.suptitle("Mean Sale Price vs Neighbourhoods in " + title , fontsize=20, y=1.01)
meanvalue = data[i]['SALE PRICE'].mean()
plt.axhline(y=meanvalue)
ax.legend(['Mean Sale Price in ' + title])
plt.show()
neighbourhood_plot(data_borough_wise,0,"Bronx")
neighbourhood_plot(data_borough_wise,1,"Brooklyn")
neighbourhood_plot(data_borough_wise,2,"Manhattan")
neighbourhood_plot(data_borough_wise,3,"Queens")
neighbourhood_plot(data_borough_wise,4,"Staten Island")
highschooldata = visualisation_dfs[7]
highschooldata['Schools per Land Size'] = highschooldata['Number of Schools']/highschooldata['Land Size']
highschooldata
plt.figure(figsize=(15, 8))
sns.barplot(x='Borough',
y='Number of Schools',
data=highschooldata)
plt.title("Barplot showing number of public high schools for each Borough",
pad=10,
fontsize=20)
plt.show()
plt.figure(figsize=(15, 8))
sns.barplot(x='Borough',
y='Schools per Land Size',
data=highschooldata)
plt.title("Barplot showing Schools per km^2 for each Borough",
pad=10,
fontsize=20)
plt.show()
pop = visualisation_dfs[8]
pop1 = pop.loc[(pop['Age Group'] == 'School-Age (5 to17) ')]
pop1 = pop1[['Borough','2020']]
pop1.columns = ['Borough','Student Population']
highschooldata = pd.merge(highschooldata,pop1,on ='Borough')
highschooldata['Schools per Student Population'] = highschooldata['Number of Schools']/highschooldata['Student Population']
highschooldata
plt.figure(figsize=(15, 8))
sns.barplot(x='Borough',
y='Schools per Student Population',
data=highschooldata)
plt.title("Barplot showing Schools per Student Population for each Borough",
pad=10,
fontsize=20)
plt.show()
satscores = visualisation_dfs[4]
plt.figure(figsize=(15, 10))
sns.boxplot(x='Borough',
y='Average SAT Score',
data=satscores, palette='Paired', order = ['Bronx','Brooklyn','Manhattan','Queens','Staten Island'])
plt.title("Average SAT scores in each Borough", pad=10, fontsize=20)
plt.show()
lat_long = visualisation_dfs[4]
lat_long = lat_long[['Latitude', 'Longitude', 'School Name', 'Borough']]
lat_long = lat_long.dropna() #incase there is NA
borough_poly = gpd.read_file("C:/Users/ChengYao/Desktop/DAO2702/Project/Clean Data/Borough Boundaries.geojson")
lat_long.head(3)
map_markers = folium.Map(location=[40.75321, -73.99786], zoom_start=10)
marker_cluster_school = MarkerCluster().add_to(map_markers)
for i in range(0, len(lat_long)):
folium.Marker(lat_long.iloc[i, :2].tolist(),
popup=lat_long.iloc[i,2] + '<br>' + '<b>' + lat_long.iloc[i,3] + '<b>')\
.add_to(marker_cluster_school)
folium.GeoJson(borough_poly['geometry'][0]).add_to(map_markers)
folium.GeoJson(borough_poly['geometry'][1]).add_to(map_markers)
folium.GeoJson(borough_poly['geometry'][2]).add_to(map_markers)
folium.GeoJson(borough_poly['geometry'][3]).add_to(map_markers)
folium.GeoJson(borough_poly['geometry'][4]).add_to(map_markers)
map_markers
rats = visualisation_dfs[5]
rats1 = rats['Borough'].value_counts().reset_index() #calculate number of sightings per borough
rats1.columns = ['Borough', 'Total Number of Rat Sightings']
plt.figure(figsize=(15, 8))
sns.barplot(x='Borough',
y='Total Number of Rat Sightings',
data=rats1, palette='Set2')
plt.title("Barplot showing Total Number of Rat Sightings for each Borough",
pad=10, fontsize=20)
plt.show()
rats2 = visualisation_dfs[6]
plt.figure(figsize=(15, 8))
sns.barplot(x='Borough',
y='Average No. of Sightings/km^2',
order = ['Brooklyn','Manhattan','Bronx','Queens','Staten Island'],
data=rats2, palette='husl')
plt.title("Barplot showing Average No. of Sightings per km^2 for each Borough",
pad=10, fontsize=20)
plt.show()
m = folium.Map([40.75321, -73.99786], zoom_start=10)
heat_arr = rats.dropna(subset=['Latitude', 'Longitude'])
heat_arr = heat_arr[['Latitude', 'Longitude']]
steps=20
colormap = branca.colormap.linear.YlOrRd_09.scale(0, 1).to_step(steps)
gradient_map=defaultdict(dict)
for i in range(steps):
gradient_map[1/steps*i] = colormap.rgb_hex_str(1/steps*i)
colormap.add_to(m) #add color bar at the top of the map
# plot heatmap
m.add_children(HeatMap(heat_arr, radius=9,gradient = gradient_map))
folium.GeoJson(borough_poly['geometry'][0]).add_to(m)
folium.GeoJson(borough_poly['geometry'][1]).add_to(m)
folium.GeoJson(borough_poly['geometry'][2]).add_to(m)
folium.GeoJson(borough_poly['geometry'][3]).add_to(m)
folium.GeoJson(borough_poly['geometry'][4]).add_to(m)
m
gmaps.configure(api_key = 'AIzaSyCPDB6lIcli7R-haaCtIdnVIlFhs6fpW7M') #my API key
m1 = gmaps.figure(center = (40.730610, -73.935242), zoom_level = 10)
m1.add_layer(gmaps.heatmap_layer(heat_arr))
m1
pop = visualisation_dfs[8]
pop1 = pop.iloc[:,[0,1,2,4,6,8]]
def plotpopulation(lst,j,title = ''):
barWidth = 0.25
bars1 = pop1.iloc[lst[0],2:].tolist()
bars2 = pop1.iloc[lst[1],2:].tolist()
bars3 = pop1.iloc[lst[2],2:].tolist()
r1 = np.arange(len(bars1))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
j.bar(r1, bars1, color='#6497b1', width=barWidth, edgecolor='white', label='Total Population')
j.bar(r2, bars2, color='#35b899', width=barWidth, edgecolor='white', label='Student Population (5 - 17)')
j.bar(r3, bars3, color='#b3c0e2', width=barWidth, edgecolor='white', label='Elderly ( >= 65 )')
j.title.set_text(title)
j.set_xlabel('Years')
j.set_ylabel('Number in Population')
j.set_xticklabels(['dummy',' 2010',' 2020',' 2030',' 2040'])
fig, ax = plt.subplots(ncols=3, nrows=2, figsize=(15, 10))
fig.delaxes(ax[1,2])
ax = ax.reshape(-1)
bronx,brooklyn,manhattan,queens,statenisland = [1,7,13],[2,8,14],[3,9,15],[4,10,16],[5,11,17]
total = [bronx,brooklyn,manhattan,queens,statenisland]
boroughslst = boroughs.tolist() #defined earlier above
for i,j in enumerate(ax[:-1]):
plotpopulation(total[i],j,title = boroughslst[i])
plt.suptitle("Grouped Barplot showing Population Growth from 2010 to 2040", fontsize=20)
plt.tight_layout()
plt.subplots_adjust(top = 0.9)
ax[4].legend(loc='center left', bbox_to_anchor=(1.2, 0.5),prop={'size':20})
plt.show()
crimes = visualisation_dfs[2]
meancrimes = visualisation_dfs[3]
sns.set(style='darkgrid', font_scale=1.3, font="calibri",
rc={'axes.facecolor':'#d8d8d8'})
years = crimes['Year'].unique()
years_str = list(map(str, years))
plt.figure(figsize=(15,10))
for i in boroughs:
sns.lineplot(y = 'Index Total', x = 'Year',
data = crimes.groupby(['Borough','Year']).mean().loc[i].reset_index(),
label=i,lw=3,marker='o',ms=10) ##this doesn't get the mean since every year only has one value. I did this just to get a value out
plt.title('Total Number of Crimes from 1990 to 2018 in all 5 Boroughs ',pad=10, fontsize=18) #title
plt.xticks(years, years_str,rotation = 45) #plot the x-axis years but with labels as str format
plt.ylabel('Total Number of Crimes')
plt.show() #plot graph out
sns.set(style='darkgrid', font_scale=1.3, font="calibri",
rc={'axes.facecolor':'#d8d8d8'})
years = crimes['Year'].unique()
years_str = list(map(str, years))
plt.figure(figsize=(15,10))
for i in boroughs:
sns.lineplot(y = 'Mean No. of Crime', x = 'Year',
data = meancrimes.groupby(['Borough','Year']).mean().loc[i].reset_index(),
label=i,lw=3,marker='o',ms=10) ##this doesn't get the mean since every year only has one value. I did this just to get a value out
plt.title('Number of Crimes per Population from 1990 to 2018 in all 5 Boroughs ',pad=10, fontsize=18) #title
plt.xticks(years, years_str,rotation = 45) #plot the x-axis years but with labels as str format
plt.ylabel('Number of Crimes per Population')
plt.show() #plot graph out
landsize = highschooldata[['Borough','Land Size']]
meancrimes2 = pd.merge(crimes,landsize,on ='Borough')
meancrimes3 = meancrimes2[['Borough','Index Total','Land Size','Year']]
meancrimes3['No. of Crimes per km^2'] = meancrimes3['Index Total']/meancrimes3['Land Size']
sns.set(style='darkgrid', font_scale=1.3, font="calibri",
rc={'axes.facecolor':'#d8d8d8'})
years = crimes['Year'].unique()
years_str = list(map(str, years))
plt.figure(figsize=(15,10))
for i in boroughs:
sns.lineplot(y = 'No. of Crimes per km^2', x = 'Year',
data = meancrimes3.groupby(['Borough','Year']).mean().loc[i].reset_index(),
label=i,lw=3,marker='o',ms=10) ##this doesn't get the mean since every year only has one value. I did this just to get a value out
plt.title('Number of Crimes per km^2 from 1990 to 2018 in all 5 Boroughs ',pad=10, fontsize=18) #title
plt.xticks(years, years_str,rotation = 45) #plot the x-axis years but with labels as str format
plt.ylabel('Number of Crimes per km^2')
plt.show() #plot graph out
crimes2018 = crimes.loc[(crimes['Year'] == 2018)]
crimes2018.reset_index(drop = True)
boroughnamecrimes = crimes2018['Borough'].unique()
crimes2018list = []
typeofcrimes = crimes2018.columns.tolist()
typeofcrimes = typeofcrimes[6:14]
for i in range(0,5):
df = crimes2018.iloc[i,6:14].tolist()
indextotal = [crimes2018.iloc[i,4].tolist()] * 8
df = pd.DataFrame(list(zip(df,typeofcrimes,indextotal)),columns =['Number of Crimes','Type of Crime','Index Total'])
df['Proportion of Type of Crime'] = df['Number of Crimes']/df['Index Total']
df = df.drop(4,axis = 0)
df = df.reset_index(drop = True)
crimes2018list.append(df)
crimes2018list[0]
def crimetype(data,boroughdata):
sns.set(style='darkgrid', font_scale=1.3, font="calibri",
rc={'axes.facecolor':'#d8d8d8'})
fig, ax = plt.subplots(ncols=2, nrows=1, figsize=(15,10))
for i,j in enumerate(ax.reshape(-1)):
sns.barplot(x='Type of Crime',
y='Proportion of Type of Crime',
data=data[i], palette='Set2',ax = j)
j.set_xticklabels(j.get_xticklabels(), rotation=70)
j.set_title(boroughdata[i],fontsize = 20)
j.set_ylabel('Proprtion of Crimes',fontsize = 20)
j.set_xlabel('Type of Crimes',fontsize = 20)
fig.tight_layout(pad=2.0)
plt.suptitle("Proprtion of Different Types of Crimes In 2018", fontsize=20, y=1.01)
plt.show()
crimetype(crimes2018list[0:2],boroughnamecrimes[0:2])
crimetype(crimes2018list[2:4],boroughnamecrimes[2:4])
sns.set(style='darkgrid', font_scale=1.3, font="calibri",
rc={'axes.facecolor':'#d8d8d8'})
plt.figure(figsize=(8,9))
sns.barplot(x='Type of Crime',y='Proportion of Type of Crime',data=crimes2018list[4], palette='Set2')
plt.title('Staten Island',fontsize = 18)
plt.suptitle("Proprtion of Different Types of Crimes In 2018", fontsize=20, y=1.01)
plt.xticks(rotation = 70)
plt.xlabel('Type of Crimes',fontsize = 20)
plt.ylabel('Proprtion of Crimes', fontsize = 20)
plt.show()